---
title: "gender_interviews"
author: "Maggie Kenney"
date: "2024-04-03"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(ggplot2)
library(estimatr)
library(stats)
library(modelsummary)
library(purrr)
library(tidyverse)
library(magrittr)
library(sandwich)
library(coefplot)
library(kableExtra)
library(knitr)
```

```{r}
setwd("Set Directory")
full_sample <- read.csv("kenney_salchak_audit.csv")
firm_sample <- read.csv("firm_sample.csv")
oecd_sample <- read.csv("oecd_sample.csv")
```
# Table 1: Overall Effect Size
```{r}
# As pre-registered, we exclude those whose emails did not deliver
full <- full_sample %>% 
  filter(delivered == 1)

# Calculate total responses for Jake Miller
total_responses_jake <- full %>%
  filter(treat == 1)
jake_n <- as.numeric(nrow(total_responses_jake))
jake_rrate <- mean(total_responses_jake$scheduled) * 100

# Calculate total responses for Mary Williams
total_responses_mary <- full %>%
  filter(treat == 0)
mary_n <- as.numeric(nrow(total_responses_mary))
mary_rrate <- mean(total_responses_mary$scheduled) * 100

# Calculate male responses for Jake Miller
male_responses_jake <- total_responses_jake %>%
  filter(gender == 1)
jake_male_n <- as.numeric(nrow(male_responses_jake))
jake_male_rrate <- mean(male_responses_jake$scheduled) * 100

# Calculate female responses for Jake Miller
female_responses_jake <- total_responses_jake %>%
  filter(gender == 0)
jake_female_n <- as.numeric(nrow(female_responses_jake))
jake_female_rrate <- mean(female_responses_jake$scheduled) * 100

# Calculate male responses for Mary Williams
male_responses_mary <- total_responses_mary %>%
  filter(gender == 1)
mary_male_n <- as.numeric(nrow(male_responses_mary))
mary_male_rrate <- mean(male_responses_mary$scheduled) * 100

# Calculate female responses for Mary Williams
female_responses_mary <- total_responses_mary %>%
  filter(gender == 0)
mary_female_n <- as.numeric(nrow(female_responses_mary))
mary_female_rrate <- mean(female_responses_mary$scheduled) * 100

# Calculate gender differentials and p-values
gender_diff <- data.frame(
  Full_Sample = c(
    (mary_rrate - jake_rrate),
    t.test(full$scheduled[full$treat == 0], full$scheduled[full$treat == 1])$p.value,
    t.test(full$scheduled[full$treat == 0], full$scheduled[full$treat == 1], conf.level = 0.9)$conf.int
  ),
  Male_Interviewees = c(
    (mary_male_rrate - jake_male_rrate),
    t.test(male_responses_mary$scheduled, male_responses_jake$scheduled)$p.value,
    t.test(male_responses_mary$scheduled, male_responses_jake$scheduled, conf.level = 0.9)$conf.int
  ),
  Female_Interviewees = c(
    (mary_female_rrate - jake_female_rrate),
    t.test(female_responses_mary$scheduled, female_responses_jake$scheduled)$p.value,
    t.test(female_responses_mary$scheduled, female_responses_jake$scheduled, conf.level = 0.9)$conf.int
  )
)


# Create the table data frame
table_data <- data.frame(
  Treatment = c("Mary Williams", "Jake Miller", "Gender Differential", "90% CI"),
  Full_Sample = c(
    paste0(round(mary_rrate, 1), "% (N=", mary_n, ")"),
    paste0(round(jake_rrate, 1), "% (N=", jake_n, ")"),
    paste0(round(gender_diff$Full_Sample[1], 1), "% (p=", round(gender_diff$Full_Sample[2], 3), ")"),
    paste0("[", round(gender_diff$Full_Sample[3], 2), ",", round(gender_diff$Full_Sample[4], 2), "]")
  ),
  Male_Interviewees = c(
    paste0(round(mary_male_rrate, 1), "% (N=", mary_male_n, ")"),
    paste0(round(jake_male_rrate, 1), "% (N=", jake_male_n, ")"),
    paste0(round(gender_diff$Male_Interviewees[1], 1), "% (p=", round(gender_diff$Male_Interviewees[2], 3), ")"),
    paste0("[", round(gender_diff$Male_Interviewees[3], 2), ",", round(gender_diff$Male_Interviewees[4], 2), "]")
  ),
  Female_Interviewees = c(
    paste0(round(mary_female_rrate, 1), "% (N=", mary_female_n, ")"),
    paste0(round(jake_female_rrate, 1), "% (N=", jake_female_n, ")"),
    paste0(round(gender_diff$Female_Interviewees[1], 1), "% (p=", round(gender_diff$Female_Interviewees[2], 3), ")"),
   paste0("[", round(gender_diff$Female_Interviewees[3], 2), ",", round(gender_diff$Female_Interviewees[4], 2), "]")
  )
)


# Create the table
table <- kable(
  table_data,
  format = "latex",
  caption = "Overall Effect Size: Interview Scheduling Rate",
  align = "c"
) %>%
  kable_styling(latex_options = "HOLD_position")
save_kable(table, file = "table1.tex")
```

# Figure 1: Coefficient Plot for Hypothesis 1
```{r}
# as pre-registered, filter to emails that delivered
oecd <- oecd_sample %>% 
  filter(delivered == 1)

firm <- firm_sample %>% 
  filter(delivered == 1)

# difference of means: interviews scheduled

interview_full <- lm_robust(scheduled ~ treat, data = full)
interview_firm <- lm_robust(scheduled ~ treat, data = firm)
interview_oecd <- lm_robust(scheduled ~ treat, data = oecd)

scheduled_sample <- list(interview_full, interview_firm, interview_oecd)

pdf("figure1.pdf", width = 8, height = 6)

multiplot(scheduled_sample, xlab = "Change in Probability Interview is Scheduled", ylab = "Sample", intercept = FALSE, innerCI = 1.645, outerCI = 1.96, lwdInner = 2, lwdOuter = 1)+
  theme_classic()+
  theme(
    axis.title.x = element_text(size = 20),
    axis.title.y = element_text(size = 20),
    legend.title = element_blank(),
    legend.text = element_text(size = 20),
    axis.text.y = element_blank(),
    plot.title = element_blank(),
    axis.text.x = element_text(size = 20))

dev.off()

cm_match <- c('(Intercept)' = 'Constant', treat = "Jake Miller")

gof <- c("nobs", "r.squared") 

# Accompanying Regression Table for Figure 1; Table 5 in Appendix
modelsummary(scheduled_sample, output = "table5.tex", coef_omit = "Intercept", caption = "Figure 1 Regression Table", coef_map = cm_match, gof_map = gof, fmt = 3, stars = c('*' = .1, '**' = 0.05, '***' = 0.01))
```

# Figure 2 - Hypothesis 2
```{r}
interview_gender <- glm(scheduled ~ gender + treat, family = binomial(link = "probit"), data = full)
response_gender <- glm(respond ~ gender + treat, family = binomial(link = "probit"), data = full)

models_gender <- list("Interview Scheduled" = interview_gender, "Email Response" = response_gender)


pdf("figure2.pdf", width = 8, height = 5)
multiplot(models_gender, xlab = "Change in Probability Outcome Variable", ylab = "Outcome Variable", coefficients = "gender", innerCI = 1.645, outerCI = 1.96, lwdInner = 2, lwdOuter = 1, intercept = FALSE)+    
  theme_classic()+
  scale_color_brewer(palette = "Set1")+
  theme(
    axis.title.x = element_text(size = 20),
    axis.title.y = element_text(size = 20),
    axis.text.x = element_text(size = 20),
    axis.text.y = element_blank(),
    plot.title = element_blank(),
    legend.title = element_blank(),
    legend.text = element_text(size = 20))

dev.off()


# Accompanying Regression Table for Figure 2
cm_gender <- c('(Intercept)' = 'Constant', gender = "Elite Gender", treat = "Treatment Assignment")

modelsummary(models_gender, output = "table6.tex", coef_omit = "Intercept", caption = "Figure 2 Regression Table", coef_map = cm_gender, gof_map = gof, fmt = 3, stars = c('*' = .1, '**' = 0.05, '***' = 0.01))
```
# Table 2 - Hypothesis 3
```{r}
interview_match <- lm_robust(scheduled ~ gender_match*gender, data = full)
response_match <- lm_robust(respond ~ gender_match*gender, data = full)

models_match <- list("Interview Scheduled" = interview_match, "Email Response" = response_match)

cm_match <- c('(Intercept)' = 'Constant', gender_match = "Matched Gender", gender = "Interviewee Gender", 'gender_match:gender' = "Interaction Effect")
gof <- c("nobs", "r.squared")

modelsummary(models_match, output = "table2.tex", coef_omit = "Intercept", caption = "Interaction of Treatment and Interviewee Gender", coef_map = cm_match, gof_map = gof, fmt = 3, stars = c('*' = .1, '**' = 0.05, '***' = 0.01))
```
